{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3f9dc570-b0f5-46d4-ac27-415ec949e380",
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import statsmodels.formula.api as smf\n",
    "import statsmodels.api as sm\n",
    "import matplotlib.pyplot as plt\n",
    "from scipy import stats\n",
    "from semopy import Model\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d13939eb-e4ef-48df-a425-48959117e016",
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.read_excel(r\"E:\\df.xlsx\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4cf909e9-1f26-4706-ab7d-dd22b082f928",
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "df[\"democrat\"] = (df[\"party\"] == 1).astype(int)\n",
    "df[\"nonpartisan\"] = (df[\"party\"] == 4).astype(int)\n",
    "df[\"region_label\"] = np.where(df[\"origin_jeolla\"] == 1, \"Jeolla\", \"Gyeongsang\")\n",
    "\n",
    "plt.rcParams[\"font.size\"] = 11\n",
    "\n",
    "disc_jeolla = df[df.origin_jeolla == 1][\"disc_index\"]\n",
    "disc_gyeong = df[df.origin_jeolla == 0][\"disc_index\"]\n",
    "\n",
    "t_stat, p_val = stats.ttest_ind(disc_jeolla, disc_gyeong, equal_var=False)\n",
    "\n",
    "print(f\"Jeolla average = {disc_jeolla.mean():.3f}, Gyeongsang average = {disc_gyeong.mean():.3f}\")\n",
    "print(f\"T-test: t = {t_stat:.3f}, p = {p_val:.4f}\")\n",
    "\n",
    "plt.figure(figsize=(6,3.2))\n",
    "\n",
    "\n",
    "means = [disc_gyeong.mean(), disc_jeolla.mean()]\n",
    "ses   = [disc_gyeong.std(ddof=1)/np.sqrt(len(disc_gyeong)),\n",
    "         disc_jeolla.std(ddof=1)/np.sqrt(len(disc_jeolla))]\n",
    "sds   = [disc_gyeong.std(ddof=1),\n",
    "         disc_jeolla.std(ddof=1)]\n",
    "\n",
    "x_pos = [0, 1]\n",
    "labels = [\"Gyeongsang\", \"Jeolla\"]\n",
    "\n",
    "bars = plt.bar(x_pos, means, yerr=ses,\n",
    "               tick_label=labels,\n",
    "               color=[\"gray\",\"red\"],\n",
    "               capsize=5)\n",
    "\n",
    "plt.ylabel(\"Mean discrimination index\", fontsize=12)\n",
    "#plt.title(\"H1: Discrimination level by origin\", fontsize=13)\n",
    "\n",
    "plt.ylim(2.2, 3.4)\n",
    "\n",
    "\n",
    "plt.grid(axis='y', linestyle='--', alpha=0.5)\n",
    "\n",
    "\n",
    "for i, (bar, m, sd) in enumerate(zip(bars, means, sds)):\n",
    "    height = bar.get_height()\n",
    "    plt.text(bar.get_x() + bar.get_width()/2,\n",
    "             height + 0.03,     \n",
    "             f\"{m:.2f}\\n(±{sd:.2f})\",\n",
    "             ha='center', va='bottom', fontsize=12)\n",
    "\n",
    "plt.tight_layout()\n",
    "plt.savefig(\"figure_1.png\", dpi=600, bbox_inches=\"tight\")\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "18222b1f-4a18-46eb-8daf-9fa987b0edd9",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "24f098bc-11da-4878-a225-4406c332abd1",
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "df[\"democrat\"] = (df[\"party\"] == 1).astype(int)\n",
    "df[\"nonpartisan\"] = (df[\"party\"] == 4).astype(int)\n",
    "df[\"region_label\"] = np.where(df[\"origin_jeolla\"] == 1, \"Jeolla\", \"Gyeongsang\")\n",
    "\n",
    "controls = [\"edu_level\", \"income\", \"home_owner\", \"marital_status\", \"age\", \"ideology\", \"stay_capital\", \"sex\"]\n",
    "control_str = \" + \".join(controls)\n",
    "\n",
    "# sido fixed effects\n",
    "fe_sido = \"C(sido)\"\n",
    "\n",
    "plt.rcParams[\"font.size\"] = 11\n",
    "\n",
    "formula_dem = f\"democrat ~ disc_index * origin_jeolla + {control_str} + {fe_sido}\"\n",
    "\n",
    "model_dem = smf.glm(\n",
    "    formula_dem,\n",
    "    data=df,\n",
    "    family=sm.families.Binomial()\n",
    ").fit()\n",
    "\n",
    "print(\"\\n===== H4: 민주당 GLM 결과 =====\")\n",
    "print(model_dem.summary())\n",
    "\n",
    "disc_grid = np.linspace(df[\"disc_index\"].min(), df[\"disc_index\"].max(), 50)\n",
    "\n",
    "ref_sido = df[\"sido\"].mode()[0]\n",
    "\n",
    "def make_pred(region_value):\n",
    "    pred = pd.DataFrame({\"disc_index\": disc_grid, \"origin_jeolla\": region_value})\n",
    "    for c in controls:\n",
    "        pred[c] = df[c].mean()\n",
    "    pred[\"sido\"] = ref_sido\n",
    "    return pred\n",
    "\n",
    "pred_j = make_pred(1)  # Jeolla\n",
    "pred_g = make_pred(0)  # Gyeongsang\n",
    "\n",
    "pred_j_dem = model_dem.get_prediction(pred_j).summary_frame(alpha=0.05)\n",
    "pred_g_dem = model_dem.get_prediction(pred_g).summary_frame(alpha=0.05)\n",
    "\n",
    "pred_j[\"p_dem\"]   = pred_j_dem[\"mean\"]\n",
    "pred_j[\"dem_low\"] = pred_j_dem[\"mean_ci_lower\"]\n",
    "pred_j[\"dem_high\"]= pred_j_dem[\"mean_ci_upper\"]\n",
    "\n",
    "pred_g[\"p_dem\"]   = pred_g_dem[\"mean\"]\n",
    "pred_g[\"dem_low\"] = pred_g_dem[\"mean_ci_lower\"]\n",
    "pred_g[\"dem_high\"]= pred_g_dem[\"mean_ci_upper\"]\n",
    "\n",
    "# ---------------------------------------------------\n",
    "formula_np = f\"nonpartisan ~ disc_index * origin_jeolla + {control_str} + {fe_sido}\"\n",
    "\n",
    "model_np = smf.glm(\n",
    "    formula_np,\n",
    "    data=df,\n",
    "    family=sm.families.Binomial()\n",
    ").fit()\n",
    "\n",
    "print(model_np.summary())\n",
    "\n",
    "pred_j_np = model_np.get_prediction(pred_j).summary_frame(alpha=0.05)\n",
    "pred_g_np = model_np.get_prediction(pred_g).summary_frame(alpha=0.05)\n",
    "\n",
    "pred_j[\"p_np\"]   = pred_j_np[\"mean\"]\n",
    "pred_j[\"np_low\"] = pred_j_np[\"mean_ci_lower\"]\n",
    "pred_j[\"np_high\"]= pred_j_np[\"mean_ci_upper\"]\n",
    "\n",
    "pred_g[\"p_np\"]   = pred_g_np[\"mean\"]\n",
    "pred_g[\"np_low\"] = pred_g_np[\"mean_ci_lower\"]\n",
    "pred_g[\"np_high\"]= pred_g_np[\"mean_ci_upper\"]\n",
    "\n",
    "# ---------------------------------------------------\n",
    "# 1x2 subplot: (a) Democrat, (b) Nonpartisan\n",
    "# ---------------------------------------------------\n",
    "fig, axes = plt.subplots(1, 2, figsize=(12, 5))\n",
    "\n",
    "# -------- (a) Democrat --------\n",
    "ax = axes[0]\n",
    "\n",
    "ax.plot(disc_grid, pred_j[\"p_dem\"],\n",
    "        color=\"red\", label=\"Jeolla\", linewidth=2, linestyle=\"-\")\n",
    "ax.fill_between(disc_grid, pred_j[\"dem_low\"], pred_j[\"dem_high\"],\n",
    "                color=\"red\", alpha=0.2)\n",
    "\n",
    "ax.plot(disc_grid, pred_g[\"p_dem\"],\n",
    "        color=\"gray\", label=\"Gyeongsang\", linewidth=2, linestyle=\"--\")\n",
    "ax.fill_between(disc_grid, pred_g[\"dem_low\"], pred_g[\"dem_high\"],\n",
    "                color=\"gray\", alpha=0.2)\n",
    "\n",
    "ax.set_ylim(0, 1)\n",
    "ax.set_xlabel(\"Discrimination index\", fontsize=14)\n",
    "ax.set_ylabel(\"Predicted P(Democrat)\", fontsize=14)\n",
    "ax.set_title(\"(a) Democrat model\\nDiscrimination × Origin\", fontsize=15.5, pad=11)\n",
    "ax.legend(fontsize=12)\n",
    "ax.tick_params(axis='both', labelsize=10)\n",
    "ax.grid(axis='y', linestyle='--', alpha=0.4)\n",
    "\n",
    "# -------- (b) Nonpartisan --------\n",
    "ax2 = axes[1]\n",
    "\n",
    "ax2.plot(disc_grid, pred_g[\"p_np\"],\n",
    "         color=\"gray\", label=\"Gyeongsang\", linewidth=2, linestyle=\"--\")\n",
    "ax2.fill_between(disc_grid, pred_g[\"np_low\"], pred_g[\"np_high\"],\n",
    "                 color=\"gray\", alpha=0.2)\n",
    "\n",
    "ax2.plot(disc_grid, pred_j[\"p_np\"],\n",
    "         color=\"red\", label=\"Jeolla\", linewidth=2, linestyle=\"-\")\n",
    "ax2.fill_between(disc_grid, pred_j[\"np_low\"], pred_j[\"np_high\"],\n",
    "                 color=\"red\", alpha=0.2)\n",
    "\n",
    "ax2.set_ylim(0, 1)\n",
    "ax2.set_xlabel(\"Discrimination index\", fontsize=14)\n",
    "ax2.set_ylabel(\"Predicted P(Nonpartisan)\", fontsize=14)\n",
    "ax2.set_title(\"(b) Nonpartisan model\\nDiscrimination × Origin\", fontsize=15.5, pad=11)\n",
    "ax2.legend(fontsize=12)\n",
    "ax2.tick_params(axis='both', labelsize=10)\n",
    "ax2.grid(axis='y', linestyle='--', alpha=0.4)\n",
    "\n",
    "plt.tight_layout() \n",
    "plt.savefig(\"figure_2.png\", dpi=600, bbox_inches=\"tight\") \n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6b217f9e-0dd9-47b4-bbb2-5d83db2f6250",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "403c9b01-b6d4-4872-81fb-70af2d5fadb3",
   "metadata": {},
   "outputs": [],
   "source": [
    "fig, axes = plt.subplots(1, 2, figsize=(12, 5))\n",
    "\n",
    "# -------- (a) Democrats --------\n",
    "\n",
    "ax = axes[0]\n",
    "\n",
    "ax.plot(disc_grid, pred_j[\"p_dem\"],\n",
    "        color=\"red\", label=\"Jeolla\", linewidth=2, linestyle=\"-\")\n",
    "ax.fill_between(disc_grid, pred_j[\"dem_low\"], pred_j[\"dem_high\"],\n",
    "                color=\"red\", alpha=0.2)\n",
    "\n",
    "ax.plot(disc_grid, pred_g[\"p_dem\"],\n",
    "        color=\"gray\", label=\"Gyeongsang\", linewidth=2, linestyle=\"--\")\n",
    "ax.fill_between(disc_grid, pred_g[\"dem_low\"], pred_g[\"dem_high\"],\n",
    "                color=\"gray\", alpha=0.2)\n",
    "\n",
    "ax.set_ylim(0, 1)\n",
    "ax.set_xlabel(\"Discrimination index\", fontsize=14)\n",
    "ax.set_ylabel(\"Predicted P(Democrat)\", fontsize=14)\n",
    "ax.set_title(\"(a) Democrat model\\nDiscrimination × Origin\", fontsize=15, pad=10)\n",
    "ax.legend(fontsize=12)\n",
    "ax.tick_params(axis='both', labelsize=10)\n",
    "ax.grid(axis='y', linestyle='--', alpha=0.4)\n",
    "\n",
    "x_min, x_max = disc_grid[0], disc_grid[-1]\n",
    "y_j_min = pred_j[\"p_dem\"].iloc[0]\n",
    "y_j_max = pred_j[\"p_dem\"].iloc[-1]\n",
    "ax.text(x_min, y_j_min + 0.03, f\"{y_j_min:.2f}\", color=\"red\", ha=\"left\", va=\"bottom\", fontsize=10)\n",
    "ax.text(x_max, y_j_max + 0.02, f\"{y_j_max:.2f}\", color=\"red\", ha=\"right\", va=\"bottom\", fontsize=10)\n",
    "\n",
    "y_g_min = pred_g[\"p_dem\"].iloc[0]\n",
    "y_g_max = pred_g[\"p_dem\"].iloc[-1]\n",
    "ax.text(x_min, y_g_min + 0.05, f\"{y_g_min:.2f}\", color=\"gray\", ha=\"left\", va=\"top\", fontsize=10)\n",
    "ax.text(x_max, y_g_max + 0.05, f\"{y_g_max:.2f}\", color=\"gray\", ha=\"right\", va=\"top\", fontsize=10)\n",
    "\n",
    "\n",
    "# -------- (b) Nonpartisan --------\n",
    "ax2 = axes[1]\n",
    "\n",
    "ax2.plot(disc_grid, pred_g[\"p_np\"],\n",
    "         color=\"gray\", label=\"Gyeongsang\", linewidth=2, linestyle=\"--\")\n",
    "ax2.fill_between(disc_grid, pred_g[\"np_low\"], pred_g[\"np_high\"],\n",
    "                 color=\"gray\", alpha=0.2)\n",
    "\n",
    "ax2.plot(disc_grid, pred_j[\"p_np\"],\n",
    "         color=\"red\", label=\"Jeolla\", linewidth=2, linestyle=\"-\")\n",
    "ax2.fill_between(disc_grid, pred_j[\"np_low\"], pred_j[\"np_high\"],\n",
    "                 color=\"red\", alpha=0.2)\n",
    "\n",
    "ax2.set_ylim(0, 1)\n",
    "ax2.set_xlabel(\"Discrimination index\", fontsize=14)\n",
    "ax2.set_ylabel(\"Predicted P(Nonpartisan)\", fontsize=14)\n",
    "ax2.set_title(\"(b) Nonpartisan model\\nDiscrimination × Origin\", fontsize=15, pad=10)\n",
    "ax2.legend(fontsize=12)\n",
    "ax2.tick_params(axis='both', labelsize=10)\n",
    "ax2.grid(axis='y', linestyle='--', alpha=0.4)\n",
    "\n",
    "# Gyeongsang\n",
    "y_g_min_np = pred_g[\"p_np\"].iloc[0]\n",
    "y_g_max_np = pred_g[\"p_np\"].iloc[-1]\n",
    "ax2.text(x_min, y_g_min_np - 0.04, f\"{y_g_min_np:.2f}\", color=\"gray\", ha=\"left\", va=\"bottom\", fontsize=10)\n",
    "ax2.text(x_max, y_g_max_np - 0.06, f\"{y_g_max_np:.2f}\", color=\"gray\", ha=\"right\", va=\"bottom\", fontsize=10)\n",
    "\n",
    "# Jeolla\n",
    "y_j_min_np = pred_j[\"p_np\"].iloc[0]\n",
    "y_j_max_np = pred_j[\"p_np\"].iloc[-1]\n",
    "ax2.text(x_min, y_j_min_np - 0.04, f\"{y_j_min_np:.2f}\", color=\"red\", ha=\"left\", va=\"top\", fontsize=10)\n",
    "ax2.text(x_max, y_j_max_np - 0.03, f\"{y_j_max_np:.2f}\", color=\"red\", ha=\"right\", va=\"top\", fontsize=10)\n",
    "\n",
    "plt.tight_layout() \n",
    "plt.savefig(\"figure_2.png\", dpi=600, bbox_inches=\"tight\") \n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a689a4b1-bd3a-4fc1-b3a7-76d2d549ce14",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d9b22d41-700c-4ca0-8e01-c67f32b4d5ba",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
